Grafici andamento Covid-19

Data e Ora ultimo aggiornamento

In [1]:
import datetime

print(datetime.datetime.today())
2020-11-11 09:13:00.628852
In [2]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Clicca qui per nascondere / mostrare il codice"></form>''')
Out[2]:
In [3]:
import pandas as pd
import numpy as np
from datetime import datetime
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px


import warnings
warnings.filterwarnings('ignore')
In [4]:
url_r = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni.csv"
data_region = pd.read_csv(url_r)
#print(data_region.dtypes)
#print(data_region.isnull().sum())
#print(data_region.shape)
#print(data_region.head())

url_p = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-province/dpc-covid19-ita-province.csv"
data_province = pd.read_csv(url_p)
#print(data_province.dtypes)
#print(data_province.isnull().sum())
#print(data_province.shape)
#print(data_province.head())

url_n = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-andamento-nazionale/dpc-covid19-ita-andamento-nazionale.csv"
data_national = pd.read_csv(url_n)
#print(data_national.dtypes)
#print(data_national.isnull().sum())
#print(data_national.shape)
#print(data_national.head())

Tabella dei dati degli ultimi giorni

In [5]:
#add daily cases/deaths/recovered columns
data_national['new_cases'] = data_national['totale_casi'].diff()
data_national['new_deaths'] = data_national['deceduti'].diff()
data_national['new_recovered'] = data_national['dimessi_guariti'].diff()
data_national['new_swabs'] = data_national['tamponi'].diff()
data_national['new_unique_tested'] = data_national['casi_testati'].diff()
#add a day/day-1 percentage change for new_cases
data_national['daily_cases_perc_change'] = round((data_national['new_cases'].pct_change(1))*100,2)
data_national['daily_swab_perc_change'] = round((data_national['new_swabs'].pct_change(1))*100,2)
data_national['daily_unique_tested_perc_change'] = round((data_national['new_unique_tested'].pct_change(1))*100,2)
#detect ratio
data_national['detect_ratio_swabs'] = round((data_national['new_cases'] / data_national['new_swabs'])*100,2)
data_national['detect_ratio_cases'] = round((data_national['new_cases'] / data_national['new_unique_tested'])*100,2)
data_national.tail(10)
Out[5]:
data stato ricoverati_con_sintomi terapia_intensiva totale_ospedalizzati isolamento_domiciliare totale_positivi variazione_totale_positivi nuovi_positivi dimessi_guariti ... new_cases new_deaths new_recovered new_swabs new_unique_tested daily_cases_perc_change daily_swab_perc_change daily_unique_tested_perc_change detect_ratio_swabs detect_ratio_cases
251 2020-11-01T17:00:00 ITA 18902 1939 20841 357288 378129 26743 29907 292380 ... 29905.0 208.0 2954.0 183457.0 117478.0 -5.83 -15.02 -11.19 16.30 25.46
252 2020-11-02T17:00:00 ITA 19840 2022 21862 374650 396512 18383 22253 296017 ... 22253.0 233.0 3637.0 135731.0 87663.0 -25.59 -26.01 -25.38 16.39 25.38
253 2020-11-03T17:00:00 ITA 21114 2225 23339 394803 418142 21630 28244 302275 ... 28241.0 353.0 6258.0 182287.0 109932.0 26.91 34.30 25.40 15.49 25.69
254 2020-11-04T17:00:00 ITA 22116 2292 24408 418827 443235 25093 30550 307378 ... 30548.0 335.0 5103.0 211831.0 127500.0 8.17 16.21 15.98 14.42 23.96
255 2020-11-05T17:00:00 ITA 23256 2391 25647 446701 472348 29113 34505 312339 ... 34502.0 445.0 4961.0 219884.0 134550.0 12.94 3.80 5.53 15.69 25.64
256 2020-11-06T17:00:00 ITA 24005 2515 26520 472598 499118 26770 37809 322925 ... 37802.0 446.0 10586.0 234245.0 134566.0 9.56 6.53 0.01 16.14 28.09
257 2020-11-07T17:00:00 ITA 25109 2634 27743 504793 532536 33418 39811 328891 ... 39809.0 425.0 5966.0 231673.0 137646.0 5.31 -1.10 2.29 17.18 28.92
258 2020-11-08T17:00:00 ITA 26440 2749 29189 529447 558636 26100 32616 335074 ... 32614.0 331.0 6183.0 191144.0 119249.0 -18.07 -17.49 -13.37 17.06 27.35
259 2020-11-09T17:00:00 ITA 27636 2849 30485 542849 573334 14698 25271 345289 ... 25269.0 356.0 10215.0 147725.0 88701.0 -22.52 -22.72 -25.62 17.11 28.49
260 2020-11-10T17:00:00 ITA 28633 2971 31604 558506 590110 16776 35098 363023 ... 35090.0 580.0 17734.0 217758.0 129814.0 38.87 47.41 46.35 16.11 27.03

10 rows × 27 columns

In [6]:
#regional data preparation

data_region_Abruzzo = data_region[(data_region['denominazione_regione'] == 'Abruzzo')]
data_region_Basilicata = data_region[(data_region['denominazione_regione'] == 'Basilicata')]
data_region_Bolzano = data_region[(data_region['denominazione_regione'] == 'P.A. Bolzano')]
data_region_Calabria = data_region[(data_region['denominazione_regione'] == 'Calabria')]
data_region_Campania = data_region[(data_region['denominazione_regione'] == 'Campania')]
data_region_EmiliaR = data_region[(data_region['denominazione_regione'] == 'Emilia-Romagna')]
data_region_Friuli = data_region[(data_region['denominazione_regione'] == 'Friuli Venezia Giulia')]
data_region_Lazio = data_region[(data_region['denominazione_regione'] == 'Lazio')]
data_region_Liguria = data_region[(data_region['denominazione_regione'] == 'Liguria')]
data_region_Lombardia = data_region[(data_region['denominazione_regione'] == 'Lombardia')]
data_region_Marche = data_region[(data_region['denominazione_regione'] == 'Marche')]
data_region_Molise = data_region[(data_region['denominazione_regione'] == 'Molise')]
data_region_Piemonte = data_region[(data_region['denominazione_regione'] == 'Piemonte')]
data_region_Puglia = data_region[(data_region['denominazione_regione'] == 'Puglia')]
data_region_Sardegna = data_region[(data_region['denominazione_regione'] == 'Sardegna')]
data_region_Sicilia = data_region[(data_region['denominazione_regione'] == 'Sicilia')]
data_region_Toscana = data_region[(data_region['denominazione_regione'] == 'Toscana')]
data_region_Trento = data_region[(data_region['denominazione_regione'] == 'P.A. Trento')]
data_region_Umbria = data_region[(data_region['denominazione_regione'] == 'Umbria')]
data_region_VAosta = data_region[(data_region['denominazione_regione'] == "Valle d'Aosta")]
data_region_Veneto = data_region[(data_region['denominazione_regione'] == 'Veneto')]

def region_apply(region):
    for x in region:
        x['new_cases'] =  x['totale_casi'].diff()
        x['new_deaths'] = x['deceduti'].diff()
        x['new_recovered'] = x['dimessi_guariti'].diff()
        x['new_swabs'] = x['tamponi'].diff()
        #add a day/day-1 percentage change for new_cases
        x['daily_cases_perc_change'] = round((x['new_cases'].pct_change(1))*100,2)
        x['daily_swab_perc_change'] = round((x['new_swabs'].pct_change(1))*100,2)
        #detect ratio
        x['detect_ratio'] = round((x['new_cases'] / x['new_swabs'])*100,2)
        return; 

region_apply([data_region_Abruzzo])  
region_apply([data_region_Basilicata]) 
region_apply([data_region_Bolzano])
region_apply([data_region_Calabria])
region_apply([data_region_Campania])
region_apply([data_region_EmiliaR])
region_apply([data_region_Friuli])
region_apply([data_region_Lazio])
region_apply([data_region_Liguria])
region_apply([data_region_Lombardia])
region_apply([data_region_Marche])
region_apply([data_region_Molise])
region_apply([data_region_Piemonte])
region_apply([data_region_Puglia])
region_apply([data_region_Sardegna])
region_apply([data_region_Sicilia])
region_apply([data_region_Toscana])
region_apply([data_region_Trento])
region_apply([data_region_VAosta])
region_apply([data_region_Veneto])
In [7]:
data_region_Nordovest = data_region[(data_region.denominazione_regione.isin(['Piemonte', 'Lombardia', 'Liguria',"Valle d'Aosta"]))]
data_region_Nordest = data_region[(data_region.denominazione_regione.isin(['Emilia-Romagna', 'P.A. Bolzano', 'P.A. Trento', 'Veneto', 'Friuli Venezia Giulia']))]
data_region_Centro = data_region[(data_region.denominazione_regione.isin(['Toscana', 'Umbria', 'Marche', 'Lazio']))]
data_region_Sudisole = data_region[(data_region.denominazione_regione.isin(['Abruzzo', 'Molise', 'Campania', 'Puglia', 'Basilicata', 'Calabria', 'Sicilia', 'Sardegna']))]

cases_Nordovest = data_region_Nordovest.groupby('data').sum()
region_apply([cases_Nordovest])  
cases_Nordovest['data'] = cases_Nordovest.index

cases_Nordest = data_region_Nordest.groupby('data').sum()
region_apply([cases_Nordest])  
cases_Nordest['data'] = cases_Nordest.index

cases_Centro = data_region_Centro.groupby('data').sum()
region_apply([cases_Centro])  
cases_Centro['data'] = cases_Centro.index

cases_Sudisole = data_region_Sudisole.groupby('data').sum()
region_apply([cases_Sudisole])  
cases_Sudisole['data'] = cases_Sudisole.index

cases_Nordovest.tail(5)
Out[7]:
codice_regione lat long ricoverati_con_sintomi terapia_intensiva totale_ospedalizzati isolamento_domiciliare totale_positivi variazione_totale_positivi nuovi_positivi ... tamponi casi_testati new_cases new_deaths new_recovered new_swabs daily_cases_perc_change daily_swab_perc_change detect_ratio data
data
2020-11-06T17:00:00 13 180.689065 33.123883 10903 929 11832 167310 179142 9269 16068 ... 4809683 2935113.0 16068.0 182.0 6617.0 74180.0 20.68 13.23 21.66 2020-11-06T17:00:00
2020-11-07T17:00:00 13 180.689065 33.123883 11392 985 12377 181030 193407 14265 17135 ... 4879288 2971601.0 17135.0 195.0 2675.0 69605.0 6.64 -6.17 24.62 2020-11-07T17:00:00
2020-11-08T17:00:00 13 180.689065 33.123883 12084 1049 13133 188054 201187 7780 11143 ... 4937629 3005701.0 11143.0 157.0 3206.0 58341.0 -34.97 -16.18 19.10 2020-11-08T17:00:00
2020-11-09T17:00:00 13 180.689065 33.123883 12500 1089 13589 189113 202702 1515 8392 ... 4975586 3026856.0 8392.0 171.0 6706.0 37957.0 -24.69 -34.94 22.11 2020-11-09T17:00:00
2020-11-10T17:00:00 13 180.689065 33.123883 12873 1140 14013 195962 209975 7273 15913 ... 5046251 3066740.0 15913.0 243.0 8397.0 70665.0 89.62 86.17 22.52 2020-11-10T17:00:00

5 rows × 25 columns

Andamento Nazionale

In [8]:
fig2 = px.bar(data_national, x='data', y='totale_casi',
             hover_data=['totale_casi'], color='totale_casi',
             height=600, color_continuous_scale='Sunsetdark')

fig2.update_layout(title_text='Total COVID19 Cases - Italy',
                  xaxis_rangeslider_visible=True)
fig2.update_yaxes(tick0=0, dtick=25000,  gridcolor='White')
fig2.show()
In [9]:
fig22 = px.bar(data_national, x='data', y='totale_positivi',
             hover_data=['totale_positivi'], color='totale_positivi',
             height=600, color_continuous_scale='Sunsetdark')

fig22.update_layout(title_text='Active COVID19 Cases - Italy',
                  xaxis_rangeslider_visible=True)
fig22.update_yaxes(tick0=0, dtick=10000,  gridcolor='White')
fig22.show()

Andamento per zone d'Italia

In [10]:
fig = go.Figure()

fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordovest['data'], y=cases_Nordovest['new_cases'], name="North-West",
                         line_color='red'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordest['data'], y=cases_Nordest['new_cases'], name="North-East",
                         line_color='green'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Centro['data'], y=cases_Centro['new_cases'], name="Center",
                         line_color='darkviolet'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Sudisole['data'], y=cases_Sudisole['new_cases'], name="South and Islands",
                         line_color='darkblue'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_cases'], name="All Italy",
                         line_color='deepskyblue'))

fig.update_layout(title_text='Daily Coronavirus new cases - All Italy and Regions',
                  xaxis_rangeslider_visible=True)


fig.show()
In [11]:
fig = go.Figure()

fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordovest['data'], y=cases_Nordovest['new_swabs'], name="North-West",
                         line_color='red'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordest['data'], y=cases_Nordest['new_swabs'], name="North-East",
                         line_color='green'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Centro['data'], y=cases_Centro['new_swabs'], name="Center",
                         line_color='darkviolet'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Sudisole['data'], y=cases_Sudisole['new_swabs'], name="South and Islands",
                         line_color='darkblue'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_swabs'], name="All Italy",
                         line_color='deepskyblue'))

fig.update_layout(title_text='Daily swabs - All Italy and Regions',
                  xaxis_rangeslider_visible=True)


fig.show()
In [12]:
fig = go.Figure()


fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_deaths'], name="Daily Deaths",
                         line_color='red'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_recovered'], name="Daily Recovered",
                         line_color='green'))



fig.update_layout(title_text='Daily Coronavirus Deaths and Recoveries - Italy',
                  xaxis_rangeslider_visible=True)

fig.update_yaxes(tick0=0, dtick=500)

fig.show()
In [13]:
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_cases'], name="Daily Cases",
                         line_color='deepskyblue'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_swabs'], name="Daily swabs",
                         line_color='purple'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_unique_tested'], name="Daily unique tested",
                         line_color='red'))
fig.update_layout(title_text='Daily Coronavirus new cases and swabs - Italy',
                  xaxis_rangeslider_visible=True)

fig.update_yaxes(tick0=0, dtick=10000)

fig.show()
In [14]:
fig3 = go.Figure()

fig3.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['detect_ratio_swabs'], name="Daily detect ratio - Italy",
                         line_color='purple'))
fig3.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['detect_ratio_cases'], name="Daily unique detect ratio - Italy",
                         line_color='red'))


fig3.update_layout(title_text="Daily Swabs detect ratio - Italy",
                  xaxis_rangeslider_visible=True)
fig3.update_yaxes(dtick=5)
In [15]:
fig4 = go.Figure()

#fig4.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['terapia_intensiva'], name="Daily total UTI - Italy",
#                         line_color='blue'))
fig4.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['totale_ospedalizzati'], name="Daily total Hospital - Italy",
                         line_color='green'))

fig4.update_layout(title_text="Daily Total Hospital - Italy",
                  xaxis_rangeslider_visible=True)
fig4.update_yaxes(dtick=2000)
In [16]:
fig5 = go.Figure()

fig5.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['terapia_intensiva'], name="Daily total UTI - Italy",
                         line_color='blue'))

fig5.update_layout(title_text="Daily Total UTI - Italy",
                  xaxis_rangeslider_visible=True)
fig5.update_yaxes(dtick=200)
In [17]:
fig6 = go.Figure()

fig6.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['daily_cases_perc_change'], name="Daily cases percentual change - Italy",
                         line_color='purple'))
fig6.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['daily_swab_perc_change'], name="Daily swab percentual change - Italy",
                         line_color='red'))


fig6.update_layout(title_text="Daily v- Italy",
                  xaxis_rangeslider_visible=True)
fig6.update_yaxes(dtick=40)
In [18]:
fig7 = go.Figure()

fig7.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_recovered'], name="Daily new recovered - Italy",
                         line_color='purple'))
fig7.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_deaths'], name="Daily new deaths - Italy",
                         line_color='red'))
fig7.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_cases'], name="Daily new cases - Italy",
                         line_color='green'))

fig7.update_layout(title_text="Daily change - Italy",
                  xaxis_rangeslider_visible=True)
fig7.update_yaxes(dtick=500)
In [19]:
fig8 = go.Figure()

fig8.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['terapia_intensiva'], name="Daily total UTI - Italy",
                         line_color='purple'))
fig8.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_deaths'], name="Daily new deaths - Italy",
                         line_color='red'))


fig8.update_layout(title_text="Daily UTI vs  Death - Italy",
                  xaxis_rangeslider_visible=True)
fig8.update_yaxes(dtick=200)

Andamento Provincia di Genova

In [20]:
data_ge=data_province[data_province['sigla_provincia']=='GE']
#print(data_ge)
fig9 = go.Figure()

fig9.add_trace(go.Scatter(mode = "lines+markers", x=data_ge['data'], y=data_ge['totale_casi'], name="Daily cases GE - Italy",
                         line_color='red'))



fig9.update_layout(title_text="Daily cases GE - Italy",
                  xaxis_rangeslider_visible=True)
fig9.update_yaxes(dtick=1000)

Andamento Regionale

In [21]:
fig10 = go.Figure()

fig10.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['totale_casi'], name="Daily cases Liguria - Italy",
                         line_color='red'))



fig10.update_layout(title_text="Daily cases Liguria - Italy",
                  xaxis_rangeslider_visible=True)
fig10.update_yaxes(dtick=1000)
In [22]:
fig11 = go.Figure()

fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['terapia_intensiva'], name="Daily UTI Liguria - Italy",
                         line_color='red'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['ricoverati_con_sintomi'], name="Daily hospital Liguria - Italy",
                         line_color='purple'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_deaths'], name="Daily new deaths Liguria - Italy",
                         line_color='green'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['nuovi_positivi'], name="Daily new cases Liguria - Italy",
                         line_color='blue'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_swabs'], name="Daily new swabs Liguria - Italy",
#line_color='black'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_recovered'], name="Daily new recovered Liguria - Italy",
                         line_color='yellow'))

fig11.update_layout(title_text="Daily change Liguria - Italy",
                  xaxis_rangeslider_visible=True)
fig11.update_yaxes(dtick=100)
In [23]:
fig12 = go.Figure()

#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_cases_perc_change'], name="Daily UTI Liguria - Italy",
#                         line_color='red'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_swab_perc_change'], name="Daily hospital Liguria - Italy",
#                         line_color='purple'))
fig12.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['detect_ratio'], name="Daily detect  ratio Liguria - Italy",
                         line_color='green'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['nuovi_positivi'], name="Daily new cases Liguria - Italy",
#                         line_color='blue'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_swabs'], name="Daily new swabs Liguria - Italy",
#line_color='black'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_recovered'], name="Daily new recovered Liguria - Italy",
#                         line_color='yellow'))

fig12.update_layout(title_text="Daily detect ratio Liguria - Italy",
                  xaxis_rangeslider_visible=True)
fig12.update_yaxes(dtick=20)
In [24]:
fig13 = go.Figure()

fig13.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_cases_perc_change'], name="Daily cases perc change Liguria - Italy",
                         line_color='red'))
fig13.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_swab_perc_change'], name="Daily swab perc change Liguria - Italy",
                         line_color='purple'))


fig13.update_layout(title_text="Daily percentual change Liguria - Italy",
                  xaxis_rangeslider_visible=True)
fig13.update_yaxes(dtick=100)
In [25]:
#print(data_national.dtypes)
In [26]:
import pandas as pd
import numpy as np
import itertools
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, acf, pacf,arma_order_select_ic
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima_model import ARIMA
import warnings
#Librerie di base
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import pyplot
from matplotlib.pyplot import figure
import plotly.tools as tls
import math
import statistics as st
import seaborn as sns 
from io import StringIO
import plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import pylab as pl
import scipy.stats as scs
from itertools import product                    # some useful functions
from tqdm import tqdm_notebook
import time
import timeit
import pytest
import os
import pyarrow

#Pacchetto Sklearn
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
from sklearn.metrics import confusion_matrix,classification_report
from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle
from sklearn.svm import SVC

#Per Modello XGBoost
import xgboost as xgb
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt 

import category_encoders as ce
warnings.simplefilter('ignore')
In [27]:
ds=pd.DataFrame()
ds['data']=data_national['data']
ds['new']=data_national['new_cases']
ds.set_index('data')
ds = ds.dropna()
In [28]:
model = ARIMA(ds['new'], order=(1,1,4))
model_fit = model.fit(disp=0)
print(model_fit.summary())
# plot residual errors
residuals = pd.DataFrame(model_fit.resid)
residuals.plot()
pyplot.show()
residuals.plot(kind='kde')
pyplot.show()
print(residuals.describe())
forecast = model_fit.forecast(steps=10)[0]
print(forecast)
                             ARIMA Model Results                              
==============================================================================
Dep. Variable:                  D.new   No. Observations:                  259
Model:                 ARIMA(1, 1, 4)   Log Likelihood               -2164.976
Method:                       css-mle   S.D. of innovations           1012.704
Date:                Wed, 11 Nov 2020   AIC                           4343.951
Time:                        09:13:07   BIC                           4368.849
Sample:                             1   HQIC                          4353.962
                                                                              
===============================================================================
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const         145.6463     63.810      2.283      0.022      20.582     270.711
ar.L1.D.new     0.4105      0.065      6.282      0.000       0.282       0.539
ma.L1.D.new    -0.5576      0.043    -13.037      0.000      -0.641      -0.474
ma.L2.D.new    -0.3785      0.055     -6.931      0.000      -0.486      -0.272
ma.L3.D.new    -0.3398      0.040     -8.603      0.000      -0.417      -0.262
ma.L4.D.new     0.8795      0.033     26.900      0.000       0.815       0.944
                                    Roots                                    
=============================================================================
                  Real          Imaginary           Modulus         Frequency
-----------------------------------------------------------------------------
AR.1            2.4363           +0.0000j            2.4363            0.0000
MA.1           -0.7104           -0.7952j            1.0663           -0.3660
MA.2           -0.7104           +0.7952j            1.0663            0.3660
MA.3            0.9036           -0.4285j            1.0000           -0.0705
MA.4            0.9036           +0.4285j            1.0000            0.0705
-----------------------------------------------------------------------------
                 0
count   259.000000
mean      1.211176
std    1022.180413
min   -5183.508085
25%    -300.897327
50%    -111.175297
75%     149.006189
max    6058.733262
[40311.50912286 39317.99407512 34145.04785194 36994.38367604
 38249.79824689 38850.96489769 39183.58607297 39405.97889259
 39583.12689709 39741.703516  ]
In [ ]:
 
In [29]:
X = ds['new'].values
size = int(len(X) * 0.999)
train, test = X[0:size], X[size:len(X)]
history = [x for x in train]
predictions = list()
for t in range(len(test)):
	model = ARIMA(history, order=(1,1,4))
	model_fit = model.fit(disp=0)
	output = model_fit.forecast()
	yhat = output[0]
	predictions.append(yhat)
	obs = test[t]
	history.append(obs)
	print('predicted=%f, expected=%f' % (yhat, obs))
error = mean_squared_error(test, predictions)
print('Test MSE: %.3f' % error)
# plot
#pyplot.plot(test)
#pyplot.plot(predictions, color='red')
#pyplot.show()
predicted=28989.740257, expected=35090.000000
Test MSE: 37213168.935
In [30]:
forecast = model_fit.forecast(steps=2)[0]
print(forecast)
pyplot.plot(X)
pyplot.plot(forecast, color='red')
pyplot.show()
[28989.74025672 30970.36850818]
In [31]:
'''
import warnings
from pandas import read_csv
from pandas import datetime
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error

# evaluate an ARIMA model for a given order (p,d,q)
def evaluate_arima_model(X, arima_order):
	# prepare training dataset
	train_size = int(len(X) * 0.99)
	train, test = X[0:train_size], X[train_size:]
	history = [x for x in train]
	# make predictions
	predictions = list()
	for t in range(len(test)):
		model = ARIMA(history, order=arima_order)
		model_fit = model.fit(disp=0)
		yhat = model_fit.forecast()[0]
		predictions.append(yhat)
		history.append(test[t])
	# calculate out of sample error
	error = mean_squared_error(test, predictions)
	return error

# evaluate combinations of p, d and q values for an ARIMA model
def evaluate_models(dataset, p_values, d_values, q_values):
	#dataset = dataset.astype('float32')
	best_score, best_cfg = float("inf"), None
	for p in p_values:
		for d in d_values:
			for q in q_values:
				order = (p,d,q)
				try:
					mse = evaluate_arima_model(dataset, order)
					if mse < best_score:
						best_score, best_cfg = mse, order
					print('ARIMA%s MSE=%.3f' % (order,mse))
				except:
					continue
	print('Best ARIMA%s MSE=%.3f' % (best_cfg, best_score))

# load dataset


# evaluate parameters
p_values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
d_values = range(0, 10)
q_values = range(0, 10)
#warnings.filterwarnings("ignore")
evaluate_models(ds['new'].values, p_values, d_values, q_values)
'''
Out[31]:
'\nimport warnings\nfrom pandas import read_csv\nfrom pandas import datetime\nfrom statsmodels.tsa.arima_model import ARIMA\nfrom sklearn.metrics import mean_squared_error\n\n# evaluate an ARIMA model for a given order (p,d,q)\ndef evaluate_arima_model(X, arima_order):\n\t# prepare training dataset\n\ttrain_size = int(len(X) * 0.99)\n\ttrain, test = X[0:train_size], X[train_size:]\n\thistory = [x for x in train]\n\t# make predictions\n\tpredictions = list()\n\tfor t in range(len(test)):\n\t\tmodel = ARIMA(history, order=arima_order)\n\t\tmodel_fit = model.fit(disp=0)\n\t\tyhat = model_fit.forecast()[0]\n\t\tpredictions.append(yhat)\n\t\thistory.append(test[t])\n\t# calculate out of sample error\n\terror = mean_squared_error(test, predictions)\n\treturn error\n\n# evaluate combinations of p, d and q values for an ARIMA model\ndef evaluate_models(dataset, p_values, d_values, q_values):\n\t#dataset = dataset.astype(\'float32\')\n\tbest_score, best_cfg = float("inf"), None\n\tfor p in p_values:\n\t\tfor d in d_values:\n\t\t\tfor q in q_values:\n\t\t\t\torder = (p,d,q)\n\t\t\t\ttry:\n\t\t\t\t\tmse = evaluate_arima_model(dataset, order)\n\t\t\t\t\tif mse < best_score:\n\t\t\t\t\t\tbest_score, best_cfg = mse, order\n\t\t\t\t\tprint(\'ARIMA%s MSE=%.3f\' % (order,mse))\n\t\t\t\texcept:\n\t\t\t\t\tcontinue\n\tprint(\'Best ARIMA%s MSE=%.3f\' % (best_cfg, best_score))\n\n# load dataset\n\n\n# evaluate parameters\np_values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\nd_values = range(0, 10)\nq_values = range(0, 10)\n#warnings.filterwarnings("ignore")\nevaluate_models(ds[\'new\'].values, p_values, d_values, q_values)\n'
In [32]:
#Best ARIMA(1, 1, 4) MSE=154507.826
In [33]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.tsa.api import VAR
In [34]:
dat= data_national.drop(['note','stato','variazione_totale_positivi','daily_cases_perc_change','daily_swab_perc_change',
                        'daily_unique_tested_perc_change','detect_ratio_swabs','detect_ratio_cases'], axis = 1).fillna(999999999)
#dat=data_national.fillna(-9999)
dat.set_index('data')
dat = dat.drop(['data'], axis = 1)
dat.dtypes
Out[34]:
ricoverati_con_sintomi            int64
terapia_intensiva                 int64
totale_ospedalizzati              int64
isolamento_domiciliare            int64
totale_positivi                   int64
nuovi_positivi                    int64
dimessi_guariti                   int64
deceduti                          int64
casi_da_sospetto_diagnostico    float64
casi_da_screening               float64
totale_casi                       int64
tamponi                           int64
casi_testati                    float64
new_cases                       float64
new_deaths                      float64
new_recovered                   float64
new_swabs                       float64
new_unique_tested               float64
dtype: object
In [35]:
model = VAR(dat)
model_fit = model.fit()
#model_fit.summary()
In [36]:
pred = model_fit.forecast(model_fit.y, steps=5)
print(pred)
[[3.00936610e+04 3.18328498e+03 3.32769459e+04 5.83550056e+05
  6.16827002e+05 4.12716333e+04 3.76927709e+05 4.29707455e+04
  2.07382531e+07 2.04596872e+07 1.03672762e+06 1.80019263e+07
  3.56457050e+07 4.12646215e+04 6.40745524e+02 1.39047090e+04
  2.61730327e+05 2.98428349e+04]
 [3.18982020e+04 3.43663873e+03 3.53348407e+04 6.11493385e+05
  6.46828225e+05 4.35671989e+04 3.89763344e+05 4.36780620e+04
  9.32198181e+07 9.29491738e+07 1.08027047e+06 1.82599848e+07
  6.81406107e+07 4.35428466e+04 7.07316435e+02 1.28356354e+04
  2.58058521e+05 2.47669536e+07]
 [3.38663874e+04 3.71463361e+03 3.75810210e+04 6.39810550e+05
  6.77391571e+05 4.48392923e+04 4.03249059e+05 4.44388341e+04
  1.98220024e+08 1.97963842e+08 1.12508025e+06 1.85103578e+07
  1.05666310e+08 4.48097772e+04 7.60772171e+02 1.34857142e+04
  2.50372991e+05 5.71270093e+07]
 [3.59612865e+04 4.01374561e+03 3.99750321e+04 6.68784921e+05
  7.08759954e+05 4.66653841e+04 4.17685175e+05 4.52691896e+04
  3.01372595e+08 3.01130144e+08 1.17171523e+06 1.87619630e+07
  1.50172263e+08 4.66349815e+04 8.30355442e+02 1.44361161e+04
  2.51605196e+05 9.45181846e+07]
 [3.82069110e+04 4.33715102e+03 4.25440620e+04 6.98967021e+05
  7.41511083e+05 4.90303479e+04 4.33023853e+05 4.61774809e+04
  4.03308167e+08 4.03078743e+08 1.22071341e+06 1.90198196e+07
  2.00424181e+08 4.89981848e+04 9.08291283e+02 1.53386781e+04
  2.57856589e+05 1.38894861e+08]]
In [37]:
# model fitting
#model = VAR(dat)
#results = model.fit(maxlags=30, ic='aic')
#results.summary()
In [38]:
# forecasting
#lag_order = results.k_ar
#results.forecast(x_train4.values[-lag_order:], 5)
In [39]:
# plotting
plt.figure(figsize=(300, 300))
model_fit.plot_forecast(10)
Out[39]:
<Figure size 21600x21600 with 0 Axes>
In [40]:
# Evaluation
#fevd = model_fit.fevd(5)
#fevd.summary()
In [ ]: